{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "Working with QDA - a nonlinear LDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pandas_datareader import data\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "from pandas_datareader import data\n",
    "import pandas as pd\n",
    "\n",
    "tickers = [\"F\", \"TM\", \"GM\", \"TSLA\"]\n",
    "\n",
    "first_date = '2009-01-01'\n",
    "last_date = '2016-12-31'\n",
    "\n",
    "\n",
    "stock_panel = data.DataReader(tickers, 'google', first_date, last_date)\n",
    "\n",
    "stock_df = stock_panel.Close.dropna()\n",
    "\n",
    "classes = (stock_df.shift(-180) > stock_df).astype(int)\n",
    "\n",
    "X = stock_panel.to_frame()\n",
    "classes = classes.unstack()\n",
    "classes = classes.swaplevel(0, 1).sort_index()\n",
    "classes = classes.to_frame()\n",
    "classes.index.names = ['Date', 'minor']\n",
    "data = X.join(classes).dropna()\n",
    "data.rename(columns={0: 'is_higher'}, inplace=True)\n",
    "\n",
    "import patsy\n",
    "X = patsy.dmatrix(\"Open + High + Low + Close + Volume + is_higher - 1\", data.reset_index(),return_type='dataframe')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "23.0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA\n",
    "qda = QDA()\n",
    "\n",
    "qda.fit(X.iloc[:, :-1], X.iloc[:, -1])\n",
    "predictions = qda.predict(X.iloc[:, :-1])\n",
    "predictions.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "             precision    recall  f1-score   support\n",
      "\n",
      "        0.0       0.86      0.99      0.92       845\n",
      "        1.0       0.70      0.10      0.18       156\n",
      "\n",
      "avg / total       0.83      0.85      0.80      1001\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report\n",
    "print classification_report(X.iloc[:, -1].values, predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.25444444444444442"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import ShuffleSplit\n",
    "import scipy.stats as sp\n",
    "\n",
    "shuffle_split_inst = ShuffleSplit()\n",
    "\n",
    "for test, train in shuffle_split_inst.split(X):\n",
    "      train_set = X.iloc[train]\n",
    "      train_close = train_set.Close\n",
    "\n",
    "      train_0 = train_close[~train_set.is_higher.astype(bool)]\n",
    "      train_1 = train_close[train_set.is_higher.astype(bool)]\n",
    "\n",
    "      test_set = X.iloc[test]\n",
    "      test_close = test_set.Close.values\n",
    "\n",
    "ll_0 = sp.norm.pdf(test_close, train_0.mean())\n",
    "ll_1 = sp.norm.pdf(test_close, train_1.mean())\n",
    "\n",
    "(ll_0 > ll_1).mean()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
